include constants.doh

///////////////////////////////////////////////////////////////////////
/// 1. For both all and weekend run our main "pooled" specifications //
///////////////////////////////////////////////////////////////////////

local days_specs = "all weekend"

foreach days_of_week in `days_specs' {

    // See the README for instructions on how to access this data
    import delimited "exposure_mvmt_timing_regressions_long_`days_of_week'.csv", clear

    // Make clean buckets
    encode age_bracket, gen(age_bracket_clean)

    // Make the age variables
    gen age_bracket_18_34 = age_bracket == "18-24" | age_bracket == "25-34"
    gen age_bracket_35_54 = age_bracket == "35-44" | age_bracket == "45-54"
    gen age_bracket_55 = age_bracket == "55-64" | age_bracket == "65+"

    // Make the zcta/county terciles
    gen top_tercile_zcta_inc = zcta_median_hh_inc_tercile == 1
    gen mid_tercile_zcta_inc = zcta_median_hh_inc_tercile == 2
    gen bot_tercile_zcta_inc = zcta_median_hh_inc_tercile == 3

    gen top_tercile_case100k = county_cases_per_100k_jh_tercile == 1
    gen mid_tercile_case100k = county_cases_per_100k_jh_tercile == 2
    gen bot_tercile_case100k = county_cases_per_100k_jh_tercile == 3

    // Make friend weighted measures
    egen fw_median_hh_inc_grp = cut(frnd_weighted_median_hh_inc), group(100)
    egen fw_pop_density_grp = cut(frnd_weighted_pop_density), group(100)
    egen fw_frac_urban_grp = cut(frnd_weighted_frac_urban), group(100)

    // Clean main LHS and RHS
    rename diff_share_days_no_mvmt d_share_no_mvmt
    replace d_share_no_mvmt = d_share_no_mvmt * 100

    rename pct_change_avg_daily_ pct_cg_avg_tiles
    replace pct_cg_avg_tiles = pct_cg_avg_tiles * 100

    // Make all the new necessary vars
    gen log_fwcXage_18_34 = d_log_fwc_100k*age_bracket_18_34
    gen log_fwcXage_35_54 = d_log_fwc_100k*age_bracket_35_54
    gen log_fwcXage_55 = d_log_fwc_100k*age_bracket_55

    gen log_fwcXfemale = d_log_fwc_100k*(is_female==1)
    gen log_fwcXmale = d_log_fwc_100k*(is_female==0)

    gen log_fwcXcollege = d_log_fwc_100k*(has_college==1)
    gen log_fwcXno_college = d_log_fwc_100k*(has_college==0)

    gen log_fwcXbot_inc = d_log_fwc_100k*bot_tercile_zcta_inc
    gen log_fwcXmid_inc = d_log_fwc_100k*mid_tercile_zcta_inc
    gen log_fwcXtop_inc = d_log_fwc_100k*top_tercile_zcta_inc

    gen log_fwcXbot_case100k = d_log_fwc_100k*bot_tercile_case100k
    gen log_fwcXmid_case100k = d_log_fwc_100k*mid_tercile_case100k
    gen log_fwcXtop_case100k = d_log_fwc_100k*top_tercile_case100k

    compress

    fegen interaction_group = group(home_zcta is_female has_college age_bracket_clean has_iphone has_tablet)


    ///// Baseline pooled results (Col 1 of Table 2) /////

    local rhs_vars = "d_log_fwc_100k"
    local lhs_vars = "d_share_no_mvmt pct_cg_avg_tiles"

    /// W/o weights

    foreach rhs in `rhs_vars' {

        foreach lhs in `lhs_vars' {

            eststo clear

            eststo panel1: reghdfe `lhs' `rhs', absorb(i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month) vce(cluster i.home_zcta) tol(1e-5)
            estadd ysumm

            eststo panel2: reghdfe `lhs' `rhs', absorb(i.userid i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month) vce(cluster i.home_zcta) tol(1e-5)
            estadd ysumm

            xml_tab panel*, save("output/tables/pooled_`lhs'___`rhs'__`days_of_week'") stats(N r2 ymean) replace below

        }
    }

    eststo clear

    /// W/ weights

    foreach rhs in `rhs_vars' {

        foreach lhs in `lhs_vars' {

            eststo clear

            eststo panel1: reghdfe `lhs' `rhs' [pweight=weight], absorb(i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month) vce(cluster i.home_zcta) tol(1e-5)
            estadd ysumm

            eststo panel2: reghdfe `lhs' `rhs' [pweight=weight], absorb(i.userid i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month) vce(cluster i.home_zcta) tol(1e-5)
            estadd ysumm

            xml_tab panel*, save("output/tables/pooled_`lhs'___`rhs'__`days_of_week'__weighted") stats(N r2 ymean) replace below

        }
    }


    ////// Make the "pooled binscatter" results (Figure A10) ///////

    local rhs = "d_log_fwc_100k"
    local lhs = "d_share_no_mvmt"
    local lhs_label = "Change in Share Days in Single Tile (%)"
    local rhs_label = "Change in log(FWC per 100k + 1)"

    /// Baseline ///

    preserve

        // Again we use Sergio's mata classes from REGHDFE to deman
        // Our raw mata commands will not be as smart about missing observations as reghdfe
        drop if `lhs' == . | `rhs' == .

        mata: HDFE = fixed_effects("i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month")
        mata: data = HDFE.partial_out("`lhs' `rhs'")
        mata: st_store(HDFE.sample, st_addvar("double", tokens("R_`lhs' R_`rhs'")), data)

        // Add back the means
        egen mean_`lhs' = mean(`lhs')
        egen mean_`rhs' = mean(`rhs')

        gen Rmean_`lhs' = R_`lhs' + mean_`lhs'
        gen Rmean_`rhs' = R_`rhs' + mean_`rhs'

        binscatter Rmean_`lhs' Rmean_`rhs', ytitle(`lhs_label') xtitle(`rhs_label') reportreg n(20) line(qfit) ytitle(, size(large)) ylabel(, labsize(large)) xtitle(, size(large)) xlabel(, labsize(large))

        graph export "output/figures/pooled_binscatter_`lhs'__`rhs'_`days_of_week'.pdf"

    restore

    /// Now add userid FEs ///

    preserve

        drop if `lhs' == . | `rhs' == .

        mata: HDFE = fixed_effects("i.userid i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month")
        mata: data = HDFE.partial_out("`lhs' `rhs'")
        mata: st_store(HDFE.sample, st_addvar("double", tokens("R_`lhs' R_`rhs'")), data)

        egen mean_`lhs' = mean(`lhs')
        egen mean_`rhs' = mean(`rhs')

        gen Rmean_`lhs' = R_`lhs' + mean_`lhs'
        gen Rmean_`rhs' = R_`rhs' + mean_`rhs'

        binscatter Rmean_`lhs' Rmean_`rhs', ytitle(`lhs_label') xtitle(`rhs_label') reportreg n(20) line(qfit) ytitle(, size(large)) ylabel(, labsize(large)) xtitle(, size(large)) xlabel(, labsize(large))

        graph export "output/tables/pooled_binscatter_userFEs_`lhs'__`rhs'_`days_of_week'.pdf"

    restore


    /////// "Pooled" heterogeneity results (Table 3) ////////

    eststo clear

    eststo panel1: reghdfe d_share_no_mvmt log_fwcXage_18_34 log_fwcXage_35_54 log_fwcXage_55, absorb(i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month) vce(cluster i.home_zcta) tol(1e-5)
    estadd ysumm

    eststo panel2: reghdfe d_share_no_mvmt log_fwcXfemale log_fwcXmale, absorb(i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month) vce(cluster i.home_zcta) tol(1e-5)
    estadd ysumm

    eststo panel3: reghdfe d_share_no_mvmt log_fwcXcollege log_fwcXno_college, absorb(i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month) vce(cluster i.home_zcta) tol(1e-5)
    estadd ysumm

    eststo panel4: reghdfe d_share_no_mvmt log_fwcXbot_inc log_fwcXmid_inc log_fwcXtop_inc, absorb(i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month) vce(cluster i.home_zcta) tol(1e-5)
    estadd ysumm

    eststo panel5: reghdfe d_share_no_mvmt log_fwcXbot_case100k log_fwcXmid_case100k log_fwcXtop_case100k, absorb(i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month) vce(cluster i.home_zcta) tol(1e-5)
    estadd ysumm

    eststo panel6: reghdfe d_share_no_mvmt log_fwc_1_25 log_fwc_26_50 log_fwc_51_75 log_fwc_76_100, absorb(i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month) vce(cluster i.home_zcta) tol(1e-5)
    estadd ysumm
    // F-test
    test log_fwc_1_25 = log_fwc_76_100
    estadd scalar F_top_bot = r(F)
    estadd scalar p_top_bot = r(p)

    xml_tab panel*, save("output/tables/hetero_pooled_d_share_no_mvmt___d_log_fwc_100k_`days_of_week'") stats(N r2 ymean F_top_bot p_top_bot) replace below
    eststo clear


    ////// Robustness pooled results (Table A10) ///////

    eststo clear

    // No controls
    eststo panel1: reghdfe d_share_no_mvmt d_log_fwc_100k, noabsorb tol(1e-5)
    estadd ysumm

    // Add Zip FEs
    eststo panel2: reghdfe d_share_no_mvmt d_log_fwc_100k, absorb(i.home_zcta#i.month) vce(cluster i.home_zcta) tol(1e-5)
    estadd ysumm

    // Zip FEs interacted with all demographic controls
    eststo panel3: reghdfe d_share_no_mvmt d_log_fwc_100k, absorb(i.interaction_group#i.month) vce(cluster i.home_zcta) tol(1e-5)
    estadd ysumm

    // Zip FEs interacted with all demographic controls + Exposure controls (in baseline table - can delete eventually)
    eststo panel4: reghdfe d_share_no_mvmt d_log_fwc_100k, absorb(i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month) vce(cluster i.home_zcta) tol(1e-5)
    estadd ysumm

    eststo panel5: reghdfe d_share_no_mvmt d_log_fwc_100k, absorb(i.userid i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month) vce(cluster i.home_zcta) tol(1e-5)
    estadd ysumm

    // Without the first month

    eststo panel6: reghdfe d_share_no_mvmt d_log_fwc_100k if month > 3, absorb(i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month) vce(cluster i.home_zcta) tol(1e-5)
    estadd ysumm

    eststo panel7: reghdfe d_share_no_mvmt d_log_fwc_100k if month > 3, absorb(i.userid i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month) vce(cluster i.home_zcta) tol(1e-5)
    estadd ysumm

    // Only the full panel

    eststo panel8: reghdfe d_share_no_mvmt d_log_fwc_100k if n_months_in_dat == 5, absorb(i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month) vce(cluster i.home_zcta) tol(1e-5)
    estadd ysumm

    eststo panel9: reghdfe d_share_no_mvmt d_log_fwc_100k if n_months_in_dat == 5, absorb(i.userid i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month) vce(cluster i.home_zcta) tol(1e-5)
    estadd ysumm

    xml_tab panel*, save("output/tables/robustness_pooled_d_share_no_mvmt___d_log_fwc_100k_`days_of_week'") stats(N r2 ymean) replace below

    /////// Make the deters results (Table A6) ////////

    // Make the frnd weighted median hh inc in terms of thousands
    gen fw_median_hh_inc = frnd_weighted_median_hh_inc/1000
    rename frnd_weighted_pop_density fw_pop_density
    rename frnd_weighted_frac_urban fw_frac_urban

    eststo clear

    eststo panel1: reghdfe d_log_fwc_100k age_bracket_35_54 age_bracket_55 is_female has_college has_iphone has_tablet mid_tercile_zcta_inc top_tercile_zcta_inc fw_median_hh_inc fw_pop_density fw_frac_urban, noabsorb vce(cluster i.home_zcta) tol(1e-5), ///
        if month == 3 & d_share_no_mvmt != .
    estadd ysumm

    eststo panel2: reghdfe d_log_fwc_100k age_bracket_35_54 age_bracket_55 is_female has_college has_iphone has_tablet mid_tercile_zcta_inc top_tercile_zcta_inc fw_median_hh_inc fw_pop_density fw_frac_urban, noabsorb vce(cluster i.home_zcta) tol(1e-5), ///
        if month == 4 & d_share_no_mvmt != .
    estadd ysumm

    eststo panel3: reghdfe d_log_fwc_100k age_bracket_35_54 age_bracket_55 is_female has_college has_iphone has_tablet mid_tercile_zcta_inc top_tercile_zcta_inc fw_median_hh_inc fw_pop_density fw_frac_urban, noabsorb vce(cluster i.home_zcta) tol(1e-5), ///
        if month == 5 & d_share_no_mvmt != .
    estadd ysumm

    eststo panel4: reghdfe d_log_fwc_100k age_bracket_35_54 age_bracket_55 is_female has_college has_iphone has_tablet mid_tercile_zcta_inc top_tercile_zcta_inc fw_median_hh_inc fw_pop_density fw_frac_urban, noabsorb vce(cluster i.home_zcta) tol(1e-5), ///
        if month == 6 & d_share_no_mvmt != .
    estadd ysumm

    eststo panel5: reghdfe d_log_fwc_100k age_bracket_35_54 age_bracket_55 is_female has_college has_iphone has_tablet mid_tercile_zcta_inc top_tercile_zcta_inc fw_median_hh_inc fw_pop_density fw_frac_urban, noabsorb vce(cluster i.home_zcta) tol(1e-5), ///
        if month == 7 & d_share_no_mvmt != .
    estadd ysumm

    eststo panel6: reghdfe d_log_fwc_100k age_bracket_35_54 age_bracket_55 is_female has_college has_iphone has_tablet, absorb(i.home_zcta fw_median_hh_inc_grp fw_pop_density_grp fw_frac_urban_grp) vce(cluster i.home_zcta) tol(1e-5), ///
        if month == 3 & d_share_no_mvmt != .
    estadd ysumm

    eststo panel7: reghdfe d_log_fwc_100k age_bracket_35_54 age_bracket_55 is_female has_college has_iphone has_tablet, absorb(i.home_zcta fw_median_hh_inc_grp fw_pop_density_grp fw_frac_urban_grp) vce(cluster i.home_zcta) tol(1e-5), ///
        if month == 4 & d_share_no_mvmt != .
    estadd ysumm

    eststo panel8: reghdfe d_log_fwc_100k age_bracket_35_54 age_bracket_55 is_female has_college has_iphone has_tablet, absorb(i.home_zcta fw_median_hh_inc_grp fw_pop_density_grp fw_frac_urban_grp) vce(cluster i.home_zcta) tol(1e-5), ///
        if month == 5 & d_share_no_mvmt != .
    estadd ysumm

    eststo panel9: reghdfe d_log_fwc_100k age_bracket_35_54 age_bracket_55 is_female has_college has_iphone has_tablet, absorb(i.home_zcta fw_median_hh_inc_grp fw_pop_density_grp fw_frac_urban_grp) vce(cluster i.home_zcta) tol(1e-5), ///
        if month == 6 & d_share_no_mvmt != .
    estadd ysumm

    eststo panel10: reghdfe d_log_fwc_100k age_bracket_35_54 age_bracket_55 is_female has_college has_iphone has_tablet, absorb(i.home_zcta fw_median_hh_inc_grp fw_pop_density_grp fw_frac_urban_grp) vce(cluster i.home_zcta) tol(1e-5), ///
        if month == 7 & d_share_no_mvmt != .
    estadd ysumm

    xml_tab panel*, save("output/tables/deters_of_d_log_fr_wgt_100k__`days_of_week'") stats(N r2 ymean F_top_bot p_top_bot) replace below
    eststo clear


}


//////////////////////////////////////////////////////////////////////////////////////////
//// 2. For two columns we need to have both social and physical distance in one table ///
//////////////////////////////////////////////////////////////////////////////////////////

// Specifically these will be used in Table 3 (last column), Table A13, and Table A14

foreach days_of_week in `days_specs' {

    // See the README for instructions on how to access this data
    import delimited "exposure_mvmt_timing_regressions_multi_dist_`days_of_week'.csv", clear

    // Make clean buckets
    encode age_bracket, gen(age_bracket_clean)

    // Make friend weighted measures
    egen fw_median_hh_inc_grp = cut(frnd_weighted_median_hh_inc), group(100)
    egen fw_pop_density_grp = cut(frnd_weighted_pop_density), group(100)
    egen fw_frac_urban_grp = cut(frnd_weighted_frac_urban), group(100)

    // Clean main LHS and RHS
    rename diff_share_days_no_mvmt d_share_no_mvmt
    replace d_share_no_mvmt = d_share_no_mvmt * 100

    rename pct_change_avg_daily_ pct_cg_avg_tiles
    replace pct_cg_avg_tiles = pct_cg_avg_tiles * 100

    fegen interaction_group = group(home_zcta is_female has_college age_bracket_clean has_iphone has_tablet)

    eststo clear

    // Without then with userid FEs

    eststo panel1: reghdfe d_share_no_mvmt close_log_fwc dist_log_fwc, absorb(i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month) vce(cluster i.home_zcta) tol(1e-5)
    estadd ysumm

    eststo panel2: reghdfe d_share_no_mvmt close_log_fwc_1_50 close_log_fwc_51_100 dist_log_fwc_1_50 dist_log_fwc_51_100, absorb(i.interaction_group#i.month fw_median_hh_inc_grp#i.month fw_pop_density_grp#i.month fw_frac_urban_grp#i.month) vce(cluster i.home_zcta) tol(1e-5)
    estadd ysumm

    xml_tab panel*, save("output/tables/cons_samp_dist__pooled_d_share_no_mvmt___d_log_fwc_100k_`days_of_week'") stats(N r2 ymean F_top_bot p_top_bot) replace below
    eststo clear
}


//////////////////////////////////////////////////////////////////////////
// 3. Make and use the "wide" file that produces the 'by-month' results //
//////////////////////////////////////////////////////////////////////////

// See the README for instructions on how to access this data
import delimited "exposure_mvmt_timing_regressions_wide.csv", clear

// Make clean buckets
encode age_bracket, gen(age_bracket_clean)

// Make the age variables
gen age_bracket_18_34 = age_bracket == "18-24" | age_bracket == "25-34"
gen age_bracket_35_54 = age_bracket == "35-44" | age_bracket == "45-54"
gen age_bracket_55 = age_bracket == "55-64" | age_bracket == "65+"

// Make the zcta/county terciles
gen top_tercile_zcta_inc = zcta_median_hh_inc_tercile == 1
gen mid_tercile_zcta_inc = zcta_median_hh_inc_tercile == 2
gen bot_tercile_zcta_inc = zcta_median_hh_inc_tercile == 3

gen top_tercile_case100k = county_cases_per_100k_jh_tercile == 1
gen mid_tercile_case100k = county_cases_per_100k_jh_tercile == 2
gen bot_tercile_case100k = county_cases_per_100k_jh_tercile == 3

// Make friend weighted measures
egen fw_median_hh_inc_grp = cut(frnd_weighted_median_hh_inc), group(100)
egen fw_pop_density_grp = cut(frnd_weighted_pop_density), group(100)
egen fw_frac_urban_grp = cut(frnd_weighted_frac_urban), group(100)

// Clean main LHS and RHS

local months = "march april may june july"

foreach month in `months' {

    rename `month'_diff_share_days_no_mvmt `month'_d_share_no_mvmt
    replace `month'_d_share_no_mvmt = `month'_d_share_no_mvmt * 100

    rename `month'_pct_change_avg_daily_ `month'_pct_cg_avg_tiles
    replace `month'_pct_cg_avg_tiles = `month'_pct_cg_avg_tiles * 100

    rename `month'_diff_log_frnd_weight `month'_d_log_fwc_100k

}

compress

fegen interaction_group = group(home_zcta is_female has_college age_bracket_clean has_iphone has_tablet)


///// Make the "adding month" results (Table 2 and Table A27) //////

local rhs = "d_log_fwc_100k"
local lhs = "d_share_no_mvmt"
local months = "march april may june july"

/// W/o weights
eststo clear
local curr_months_rhs = ""

foreach month in `months' {

    // Add the month to the RHS
    local curr_months_rhs = "`curr_months_rhs'`month'_`rhs' "

    eststo panel_month_`month': reghdfe `month'_`lhs' `curr_months_rhs', absorb(i.interaction_group fw_median_hh_inc_grp fw_pop_density_grp fw_frac_urban_grp) vce(cluster i.home_zcta) tol(1e-5)
    estadd ysumm

}

xml_tab panel*, save("output/tables/adding_months_`lhs'___`rhs''") stats(N r2 ymean F_top_bot p_top_bot) replace below
eststo clear

/// W/ weights
local curr_months_rhs = ""

foreach month in `months' {

    // Add the month to the RHS
    local curr_months_rhs = "`curr_months_rhs'`month'_`rhs' "

    eststo panel_month_`month': reghdfe `month'_`lhs' `curr_months_rhs' [pweight=weight], absorb(i.interaction_group fw_median_hh_inc_grp fw_pop_density_grp fw_frac_urban_grp) vce(cluster i.home_zcta) tol(1e-5)
    estadd ysumm

}

xml_tab panel*, save("output/tables/adding_months_`lhs'___`rhs'_weighted") stats(N r2 ymean F_top_bot p_top_bot) replace below
eststo clear


/////// Make the 'by month' results (Table A12) ///////

foreach month in `months' {

    eststo panel_month_`month': reghdfe `month'_`lhs' `month'_`rhs', absorb(i.interaction_group fw_median_hh_inc_grp fw_pop_density_grp fw_frac_urban_grp) vce(cluster i.home_zcta) tol(1e-5)
    estadd ysumm

}

xml_tab panel*, save("output/tables/by_month_`lhs'___`rhs'") stats(N r2 ymean F_top_bot p_top_bot) replace below
eststo clear
